home *** CD-ROM | disk | FTP | other *** search
- /* Copyright (C) 1993 Marc Stern (internet: stern@mble.philips.be) */
-
- #include "strings.h"
- #include <stdlib.h>
-
-
- /*
- Functions : matchset
- match
- recursexp
- regexp
- */
-
-
-
- /***
- * Function : matchset
- *
- * Description : Test if a character matches a set expression.
- *
- * Parameters : in char c character to be matched
- * in char *pattern regular expression to match
- *
- * Parameters : in char *string
- *
- * Decisions : The following symbols are treated specially:
- *
- * \ quote next character - range of values
- * ^ non-inclusion (if first character)
- *
- * ex: aeiou0-9 match a, e, i, o, u, and 0 thru 9
- * ^aeiou0-9 match anything but a, e, i, o, u, and 0 thru 9
- *
- * Return : 1 or 0
- *
- * OS/Compiler : All
- ***/
-
- int matchset( char c, char *pattern )
-
- {
- const char *begin;
- int match_ok = 1;
-
- if ( ! c ) return 0;
-
- if ( *pattern == '^' )
- {
- match_ok = 0;
- pattern ++;
- }
-
- for ( begin = pattern; *pattern; begin = pattern++ )
- {
- if ( (*pattern == '-') && (pattern != begin) ) /* range */
- {
- pattern ++; if ( *pattern == '\\' ) pattern ++;
- if ( (min(*pattern, *begin) <= c) && (max(*pattern, *begin) >= c) )
- return match_ok;
- }
-
- if ( *pattern == '\\' ) pattern++;
-
- if ( *pattern == c ) return match_ok;
- }
-
- return (! match_ok);
- }
-
-
-
-
- /***
- * Function : match (internal)
- *
- * Description : Returns the number of character of a string matched
- * by a one character regular expression.
- *
- * Decisions : The following symbols are treated specially:
- *
- * . any character \ quote next character
- * [] set of characters
- *
- * Parameters : in char *string input string to be matched
- * in char *pattern regular expression to match
- *
- * Side-effects: pattern contents will be destroyed.
- *
- * Return : number of character matched by regular expression
- * 0 if not matched
- *
- * OS/Compiler : All
- ***/
-
- static int near match( const char *string, char *pattern )
- {
- char *ptr;
- int length = 0;
-
- switch ( *pattern )
- {
- case '.' : *pattern = '\0';
- length = strlen(string);
- break;
-
- case '[' : for ( ptr = ++pattern; *ptr ; ptr ++ )
- if ( *ptr == ']' )
- {
- int i;
- for ( i = 0; ; i++ )
- if ( *(ptr - i - 1) != '\\' ) break;
-
- if ( ! (i % 2) ) break; /* not "\]" */
- }
-
- if ( *ptr ) *ptr = '\0';
- while ( matchset(*string++, pattern) ) length ++;
- break;
-
- case '\\': pattern ++;
-
- default : while ( *string++ == *pattern ) length ++;
- *pattern = '\0';
- break;
- }
-
- return length;
- }
-
-
-
-
- /***
- * Function : recursexp
- *
- * Description : Returns the number of character of a string matched
- * by a regular expression.
- *
- * Decisions : The following symbols are treated specially:
- *
- * . any character \ quote next character
- * * match zero or more + match one or more
- * ? match zero or one [] set of characters
- *
- *
- * Parameters : in char *string input string to be matched
- * in char *pattern regular expression to match
- *
- * Return : number of character matched by regular expression
- * -1 if not matched
- *
- * OS/Compiler : All
- ***/
-
- int recursexp( const char *string, char *pattern )
- {
- int count1, count2 = -1, minone = 0;
- char *pattrn, *ptr;
-
- if ( ! *pattern ) return 0;
-
- ptr = pattrn = strdup( pattern ); /* store it for free() */
- count1 = match( string, pattrn );
- if ( count1 < 0 )
- {
- free( ptr );
- return -1;
- }
-
- while ( *pattrn++ );
-
- switch ( *pattrn )
- {
- case '\0': free( ptr );
- if ( count1 ) return 1;
- else return -1;
-
- case '*': pattrn++;
- minone = 0;
- break;
-
- case '+': if ( ! count1 )
- {
- free( ptr );
- return -1;
- }
- pattrn++;
- break;
-
- case '?': count1 = min( count1, 1 );
- pattrn++;
- break;
-
- default : if ( ! count1 )
- {
- free( ptr );
- return -1;
- }
- count1 = 1;
- break;
- }
-
- for ( ; count1 >= 0; count1 -- )
- {
- count2 = recursexp( string + count1, pattrn );
- if ( count2 >= 0 ) break;
- }
-
- free( ptr );
-
- if ( count2 < 0 ) return -1;
- if ( ! count1 && minone ) return -1;
-
- return (count1 + count2);
- }
-
-
-
-
- /***
- * Function : regexp
- *
- * Description : Returns the string matched by a regular expression
- * into a string.
- *
- * Decisions : The following symbols are treated specially:
- *
- * ^ start of line $ end of line
- * . any character \ quote next character
- * * match zero or more + match one or more
- * ? match zero or one [] set of characters
- *
- * ex: [aeiou0-9] match a, e, i, o, u, and 0 thru 9
- * [^aeiou0-9] match anything but a, e, i, o, u, and 0 thru 9
- *
- * Parameters : out char *outstr resulting string
- * in char *string input string in which we search
- * in char *pattern regular expression to match
- *
- * Return : - NULL if not found
- * - pointer to resulting string
- * - if ( outstr == NULL ) returns pointer to matched string
- * inside 'string'.
- *
- * OS/Compiler : All
- ***/
-
- char *regexp( char *outstr, const char *string, const char *pattern )
- {
- char *ptr, *pattrn;
- int count, begin = 0, end = 0, length;
-
- pattrn = strdup( pattern );
- count = strlen( pattrn ) - 1;
- ptr = pattrn + count;
- if ( (*ptr == '$') ) /* Match end of line ? */
- {
- int i;
- for ( i = 0; i < count ; i++ )
- if ( *(ptr - i - 1) != '\\' ) break;
-
- if ( ! (i % 2) ) /* Match end of line */
- {
- end = 1;
- *ptr = '\0';
- }
- }
-
- ptr = pattrn; /* store it for free() */
-
- if ( *pattrn == '^' ) /* Match begin of line */
- {
- begin = 1;
- pattrn ++;
- }
-
- for ( length = strlen(string); *string; string++, length-- )
- {
- count = recursexp( string, pattrn );
- if ( begin ) break; /* only valid as first characters */
- if ( count < 0 ) continue; /* not found */
- if ( ! end || count == length ) break;
- }
-
- free( ptr );
-
- if ( count < 0 ||
- (end && count != length)
- )
- {
- if ( outstr ) *outstr = '\0';
- return NULL;
- }
-
- if ( outstr )
- {
- strleft( outstr, string, count );
- return outstr;
- }
-
- return (char *)string;
- }
-
-
- #ifdef TEST
-
- #include <stdio.h>
- #include <stdlib.h>
-
- void main( void )
- {
- char string[255], pattern[255], result[255];
-
- for (;;) {
- printf( "\n String : " );
- if ( ! *gets(string) ) exit(0);
- printf( " Pattern: " ); gets( pattern );
-
- regexp( result, string, pattern );
- printf( "\n Result : %s\n\n", result );
- }
- }
-
- #endif
-